import scrapy
import pymysql

class OpacSpider(scrapy.Spider):
    name = 'opac'
    allowed_domains = ['opac.nlc.cn']
    start_urls = ['http://opac.nlc.cn/F']
    # formUrl = None
    connect = pymysql.connect(host='127.0.0.1', user='root', passwd='root', db='douban2',port=3306)
    cursor = connect.cursor()

    def parse(self, response):
        self.cursor.execute("SELECT * FROM `book` WHERE `status` = '0' LIMIT 1")
        data = self.cursor.fetchone()
        isbn = data[2]
        formUrl = response.xpath('//*[@id="indexpage"]/form/@action').extract_first()
        yield scrapy.Request(url= formUrl + '?func=find-b&find_code=ISB&request=' + isbn, cb_kwargs=dict(isbn=isbn), callback=self.getinfo)
        
    def getinfo(self, response ,isbn):
        clc = response.xpath('//*[@class="td1"][contains(text(), "中图分类号")]/..//a/text()').extract_first()
        #载体形态项
        zqxt = response.xpath('//*[@class="td1"][contains(text(), "载体形态项")]/..//*[@align="left"]/text()').extract_first()
        #内容提要
        nqty = response.xpath('//*[@class="td1"][contains(text(), "内容提要")]/..//td[2]/text()').extract_first()
        # SQL 更新语句
        sql = "UPDATE `book` SET `clc` = '{}',`zqxt` = '{}',`nqty` = '{}',`status` = '1' WHERE `isbn13` = '{}'".format(clc,zqxt,nqty,isbn)
        try:
            # 执行SQL语句
            self.cursor.execute(sql)
            # 提交到数据库执行
            self.connect.commit()
        except:
            # 发生错误时回滚
            self.connect.rollback()
        print('-------------------------------------------------------------------')
        print(sql)
        print('-------------------------------------------------------------------')

        self.cursor.execute("SELECT * FROM `book` WHERE `status` = '0' LIMIT 1")
        data = self.cursor.fetchone()
        isbn = data[2]
        formUrl = response.xpath('/html/head/script[2]/text()').extract_first()
        yield scrapy.Request(url= formUrl[9:89] + '?func=find-b&find_code=ISB&request=' + isbn, cb_kwargs=dict(isbn=isbn), callback=self.getinfo)
